Astroids Correlation Analysis¶

In [1]:
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import seaborn as sns
%matplotlib inline 
In [2]:
filename = r"Asteroid_Updated.csv"
In [3]:
df = pd.read_csv(filename)
C:\Users\DELL\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3146: DtypeWarning: Columns (0,10,15,16,23,24) have mixed types.Specify dtype option on import or set low_memory=False.
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
In [4]:
df.head(20)
Out[4]:
name a e i om w q ad per_y data_arc ... UB IR spec_B spec_T G moid class n per ma
0 Ceres 2.769165 0.076009 10.594067 80.305532 73.597694 2.558684 2.979647 4.608202 8822.0 ... 0.426 NaN C G 0.12 1.594780 MBA 0.213885 1683.145708 77.372096
1 Pallas 2.772466 0.230337 34.836234 173.080063 310.048857 2.133865 3.411067 4.616444 72318.0 ... 0.284 NaN B B 0.11 1.233240 MBA 0.213503 1686.155999 59.699133
2 Juno 2.669150 0.256942 12.988919 169.852760 248.138626 1.983332 3.354967 4.360814 72684.0 ... 0.433 NaN Sk S 0.32 1.034540 MBA 0.226019 1592.787285 34.925016
3 Vesta 2.361418 0.088721 7.141771 103.810804 150.728541 2.151909 2.570926 3.628837 24288.0 ... 0.492 NaN V V 0.32 1.139480 MBA 0.271609 1325.432765 95.861936
4 Astraea 2.574249 0.191095 5.366988 141.576605 358.687607 2.082324 3.066174 4.130323 63507.0 ... 0.411 NaN S S NaN 1.095890 MBA 0.238632 1508.600458 282.366289
5 Hebe 2.425160 0.203007 14.737901 138.640203 239.807490 1.932835 2.917485 3.776755 62329.0 ... 0.399 NaN S S 0.24 0.973965 MBA 0.260972 1379.459705 86.197923
6 Iris 2.385334 0.231206 5.523651 259.563231 145.265106 1.833831 2.936837 3.684105 62452.0 ... 0.484 NaN S S NaN 0.846100 MBA 0.267535 1345.619196 140.419656
7 Flora 2.201764 0.156499 5.886955 110.889330 285.287462 1.857190 2.546339 3.267115 62655.0 ... 0.489 NaN NaN S 0.28 0.874176 MBA 0.301681 1193.313717 194.882895
8 Metis 2.385637 0.123114 5.576816 68.908577 6.417369 2.091931 2.679342 3.684806 61821.0 ... 0.496 NaN NaN S 0.17 1.106910 MBA 0.267484 1345.875362 276.861623
9 Hygiea 3.141539 0.112461 3.831560 283.202167 312.315206 2.788240 3.494839 5.568291 62175.0 ... 0.351 NaN C C NaN 1.778390 MBA 0.177007 2033.818284 152.184851
10 Parthenope 2.453109 0.100472 4.629886 125.546585 195.550396 2.206640 2.699579 3.842232 61755.0 ... 0.417 NaN Sk S NaN 1.193220 MBA 0.256524 1403.375193 278.930692
11 Victoria 2.334315 0.220172 8.373074 235.410169 69.641819 1.820365 2.848265 3.566543 61769.0 ... 0.515 NaN L S 0.22 0.824953 MBA 0.276353 1302.679690 133.335892
12 Egeria 2.575981 0.085121 16.536125 43.221913 80.544823 2.356710 2.795252 4.134492 61680.0 ... 0.452 NaN Ch G NaN 1.436330 MBA 0.238391 1510.123380 187.488522
13 Irene 2.585567 0.166582 9.121646 86.122665 97.858985 2.154858 3.016277 4.157593 61526.0 ... 0.388 NaN S S NaN 1.179660 MBA 0.237067 1518.560847 164.935853
14 Eunomia 2.644100 0.186084 11.752430 292.934339 98.498681 2.152075 3.136126 4.299571 61247.0 ... 0.451 NaN S S 0.23 1.194850 MBA 0.229238 1570.418187 283.387698
15 Psyche 2.923814 0.133568 3.096005 150.045666 228.823071 2.533285 3.314343 4.999571 12856.0 ... 0.299 NaN X M 0.20 1.535800 MBA 0.197142 1826.093319 288.335893
16 Thetis 2.470354 0.133032 5.591205 125.552945 136.208250 2.141719 2.798989 3.882818 61117.0 ... 0.438 NaN Sl S NaN 1.129810 MBA 0.253843 1418.199204 303.364363
17 Melpomene 2.296654 0.217674 10.128731 150.383862 227.950847 1.796731 2.796576 3.480578 60906.0 ... 0.425 NaN S S 0.25 0.813258 MBA 0.283179 1271.281262 267.254381
18 Fortuna 2.442711 0.158047 1.573782 211.144044 182.065018 2.056648 2.828773 3.817827 60970.0 ... 0.324 NaN Ch G 0.10 1.062130 MBA 0.258164 1394.461340 197.338626
19 Massalia 2.409782 0.142067 0.708751 206.108911 256.773196 2.067432 2.752132 3.740889 59461.0 ... 0.463 NaN S S 0.25 1.084610 MBA 0.263474 1366.359575 117.695129

20 rows × 31 columns

In [5]:
# replace missing values with NaN
df.replace(" ", np.nan, inplace = True)
df.tail(5)
Out[5]:
name a e i om w q ad per_y data_arc ... UB IR spec_B spec_T G moid class n per ma
839709 NaN 2.812945 0.664688 4.695700 183.310012 234.618352 0.943214 4.682676 4.717914 17298.0 ... NaN NaN NaN NaN NaN 0.032397 APO 0.208911 1723.217927 156.905910
839710 NaN 2.645238 0.259376 12.574937 1.620020 339.568072 1.959126 3.331350 4.302346 16.0 ... NaN NaN NaN NaN NaN 0.956145 MBA 0.229090 1571.431965 13.366251
839711 NaN 2.373137 0.202053 0.732484 176.499082 198.026527 1.893638 2.852636 3.655884 5.0 ... NaN NaN NaN NaN NaN 0.893896 MBA 0.269600 1335.311579 355.351127
839712 NaN 2.260404 0.258348 9.661947 204.512448 148.496988 1.676433 2.844376 3.398501 10.0 ... NaN NaN NaN NaN NaN 0.680220 MBA 0.290018 1241.302609 15.320134
839713 NaN 2.546442 0.287672 5.356238 70.709555 273.483265 1.813901 3.278983 4.063580 11.0 ... NaN NaN NaN NaN NaN 0.815280 MBA 0.242551 1484.222588 20.432959

5 rows × 31 columns

In [6]:
missing_data = df.isnull()
missing_data.head(5)
Out[6]:
name a e i om w q ad per_y data_arc ... UB IR spec_B spec_T G moid class n per ma
0 False False False False False False False False False False ... False True False False False False False False False False
1 False False False False False False False False False False ... False True False False False False False False False False
2 False False False False False False False False False False ... False True False False False False False False False False
3 False False False False False False False False False False ... False True False False False False False False False False
4 False False False False False False False False False False ... False True False False True False False False False False

5 rows × 31 columns

In [7]:
#Checking for missing values in each column
for column in missing_data.columns.values.tolist():
    print(column)
    print (missing_data[column].value_counts())
    print("")    
name
True     817747
False     21967
Name: name, dtype: int64

a
False    839712
True          2
Name: a, dtype: int64

e
False    839714
Name: e, dtype: int64

i
False    839714
Name: i, dtype: int64

om
False    839714
Name: om, dtype: int64

w
False    839714
Name: w, dtype: int64

q
False    839714
Name: q, dtype: int64

ad
False    839708
True          6
Name: ad, dtype: int64

per_y
False    839713
True          1
Name: per_y, dtype: int64

data_arc
False    824240
True      15474
Name: data_arc, dtype: int64

condition_code
False    838847
True        867
Name: condition_code, dtype: int64

n_obs_used
False    839714
Name: n_obs_used, dtype: int64

H
False    837025
True       2689
Name: H, dtype: int64

neo
False    839708
True          6
Name: neo, dtype: int64

pha
False    823272
True      16442
Name: pha, dtype: int64

diameter
True     702078
False    137636
Name: diameter, dtype: int64

extent
True     839696
False        18
Name: extent, dtype: int64

albedo
True     703305
False    136409
Name: albedo, dtype: int64

rot_per
True     820918
False     18796
Name: rot_per, dtype: int64

GM
True     839700
False        14
Name: GM, dtype: int64

BV
True     838693
False      1021
Name: BV, dtype: int64

UB
True     838735
False       979
Name: UB, dtype: int64

IR
True     839713
False         1
Name: IR, dtype: int64

spec_B
True     838048
False      1666
Name: spec_B, dtype: int64

spec_T
True     838734
False       980
Name: spec_T, dtype: int64

G
True     839595
False       119
Name: G, dtype: int64

moid
False    823272
True      16442
Name: moid, dtype: int64

class
False    839714
Name: class, dtype: int64

n
False    839712
True          2
Name: n, dtype: int64

per
False    839708
True          6
Name: per, dtype: int64

ma
False    839706
True          8
Name: ma, dtype: int64

In [8]:
# Dropping all rows
df.dropna(subset=["diameter"], axis=0, inplace=True)

# reset index, because we some rows
df.reset_index(drop=True, inplace=True)
In [9]:
df.head()
Out[9]:
name a e i om w q ad per_y data_arc ... UB IR spec_B spec_T G moid class n per ma
0 Ceres 2.769165 0.076009 10.594067 80.305532 73.597694 2.558684 2.979647 4.608202 8822.0 ... 0.426 NaN C G 0.12 1.59478 MBA 0.213885 1683.145708 77.372096
1 Pallas 2.772466 0.230337 34.836234 173.080063 310.048857 2.133865 3.411067 4.616444 72318.0 ... 0.284 NaN B B 0.11 1.23324 MBA 0.213503 1686.155999 59.699133
2 Juno 2.669150 0.256942 12.988919 169.852760 248.138626 1.983332 3.354967 4.360814 72684.0 ... 0.433 NaN Sk S 0.32 1.03454 MBA 0.226019 1592.787285 34.925016
3 Vesta 2.361418 0.088721 7.141771 103.810804 150.728541 2.151909 2.570926 3.628837 24288.0 ... 0.492 NaN V V 0.32 1.13948 MBA 0.271609 1325.432765 95.861936
4 Astraea 2.574249 0.191095 5.366988 141.576605 358.687607 2.082324 3.066174 4.130323 63507.0 ... 0.411 NaN S S NaN 1.09589 MBA 0.238632 1508.600458 282.366289

5 rows × 31 columns

In [10]:
df.dtypes #Checking datatypes
Out[10]:
name               object
a                 float64
e                 float64
i                 float64
om                float64
w                 float64
q                 float64
ad                float64
per_y             float64
data_arc          float64
condition_code     object
n_obs_used          int64
H                 float64
neo                object
pha                object
diameter           object
extent             object
albedo            float64
rot_per           float64
GM                float64
BV                float64
UB                float64
IR                float64
spec_B             object
spec_T             object
G                 float64
moid              float64
class              object
n                 float64
per               float64
ma                float64
dtype: object
In [11]:
df[["diameter"]] = df[["diameter"]].astype(float) #Converting diameter datatype to float from string
In [12]:
df["diameter"].dtypes 
Out[12]:
dtype('float64')
In [13]:
#Deleting all columns with insufficient/irrelevant data
del df['name']
del df['extent']
del df['rot_per']
del df['GM']
del df['BV']
del df['UB']
del df['IR']
del df['spec_B']
del df['spec_T']
del df['G']
In [14]:
df.head()
Out[14]:
a e i om w q ad per_y data_arc condition_code ... H neo pha diameter albedo moid class n per ma
0 2.769165 0.076009 10.594067 80.305532 73.597694 2.558684 2.979647 4.608202 8822.0 0 ... 3.34 N N 939.400 0.0900 1.59478 MBA 0.213885 1683.145708 77.372096
1 2.772466 0.230337 34.836234 173.080063 310.048857 2.133865 3.411067 4.616444 72318.0 0 ... 4.13 N N 545.000 0.1010 1.23324 MBA 0.213503 1686.155999 59.699133
2 2.669150 0.256942 12.988919 169.852760 248.138626 1.983332 3.354967 4.360814 72684.0 0 ... 5.33 N N 246.596 0.2140 1.03454 MBA 0.226019 1592.787285 34.925016
3 2.361418 0.088721 7.141771 103.810804 150.728541 2.151909 2.570926 3.628837 24288.0 0 ... 3.20 N N 525.400 0.4228 1.13948 MBA 0.271609 1325.432765 95.861936
4 2.574249 0.191095 5.366988 141.576605 358.687607 2.082324 3.066174 4.130323 63507.0 0 ... 6.85 N N 106.699 0.2740 1.09589 MBA 0.238632 1508.600458 282.366289

5 rows × 21 columns

In [15]:
df.dtypes
Out[15]:
a                 float64
e                 float64
i                 float64
om                float64
w                 float64
q                 float64
ad                float64
per_y             float64
data_arc          float64
condition_code     object
n_obs_used          int64
H                 float64
neo                object
pha                object
diameter          float64
albedo            float64
moid              float64
class              object
n                 float64
per               float64
ma                float64
dtype: object
In [16]:
#Renaming all columns to make columns meaning more understandable
df.columns = ['semi_major_axis', 'eccentricity','Inclination','Longitude','perihelion_arg','perihelion_dis','aphelion_dist','Orbital_period_per_y','data_arc','condition_code','n_obs_used','Abs_Magnitude','neo','pha','diameter','albedo','moid','class','Mean_motion','orbital_Period_per','Mean_anomaly' ]
In [17]:
df.head()
Out[17]:
semi_major_axis eccentricity Inclination Longitude perihelion_arg perihelion_dis aphelion_dist Orbital_period_per_y data_arc condition_code ... Abs_Magnitude neo pha diameter albedo moid class Mean_motion orbital_Period_per Mean_anomaly
0 2.769165 0.076009 10.594067 80.305532 73.597694 2.558684 2.979647 4.608202 8822.0 0 ... 3.34 N N 939.400 0.0900 1.59478 MBA 0.213885 1683.145708 77.372096
1 2.772466 0.230337 34.836234 173.080063 310.048857 2.133865 3.411067 4.616444 72318.0 0 ... 4.13 N N 545.000 0.1010 1.23324 MBA 0.213503 1686.155999 59.699133
2 2.669150 0.256942 12.988919 169.852760 248.138626 1.983332 3.354967 4.360814 72684.0 0 ... 5.33 N N 246.596 0.2140 1.03454 MBA 0.226019 1592.787285 34.925016
3 2.361418 0.088721 7.141771 103.810804 150.728541 2.151909 2.570926 3.628837 24288.0 0 ... 3.20 N N 525.400 0.4228 1.13948 MBA 0.271609 1325.432765 95.861936
4 2.574249 0.191095 5.366988 141.576605 358.687607 2.082324 3.066174 4.130323 63507.0 0 ... 6.85 N N 106.699 0.2740 1.09589 MBA 0.238632 1508.600458 282.366289

5 rows × 21 columns

In [18]:
avg_Abs_Magnitude = df['Abs_Magnitude'].astype('float').mean(axis=0)
print("Abs_Magnitude Mean:", avg_Abs_Magnitude)
Abs_Magnitude Mean: 15.177041267011587
In [19]:
df['Abs_Magnitude'].replace(np.nan, avg_Abs_Magnitude, inplace=True)
In [20]:
avg_data_arc = df['data_arc'].astype('float').mean(axis=0)
avg_data_arc = int(avg_data_arc)
print("avg_data_arc Mean:", avg_data_arc)
avg_data_arc Mean: 8969
In [21]:
df['data_arc'].replace(np.nan, avg_data_arc, inplace=True)
In [22]:
avg_albedo = df['albedo'].astype('float').mean(axis=0)
print("avg_albedo Mean:", avg_albedo)
avg_albedo Mean: 0.13006564520622568
In [23]:
df['albedo'].replace(np.nan, avg_albedo, inplace=True)
In [24]:
missing_data = df.isnull()
missing_data.head(5)
for column in missing_data.columns.values.tolist():
    print(column)
    print (missing_data[column].value_counts())
    print("")    
semi_major_axis
False    137636
Name: semi_major_axis, dtype: int64

eccentricity
False    137636
Name: eccentricity, dtype: int64

Inclination
False    137636
Name: Inclination, dtype: int64

Longitude
False    137636
Name: Longitude, dtype: int64

perihelion_arg
False    137636
Name: perihelion_arg, dtype: int64

perihelion_dis
False    137636
Name: perihelion_dis, dtype: int64

aphelion_dist
False    137636
Name: aphelion_dist, dtype: int64

Orbital_period_per_y
False    137636
Name: Orbital_period_per_y, dtype: int64

data_arc
False    137636
Name: data_arc, dtype: int64

condition_code
False    137636
Name: condition_code, dtype: int64

n_obs_used
False    137636
Name: n_obs_used, dtype: int64

Abs_Magnitude
False    137636
Name: Abs_Magnitude, dtype: int64

neo
False    137636
Name: neo, dtype: int64

pha
False    137636
Name: pha, dtype: int64

diameter
False    137636
Name: diameter, dtype: int64

albedo
False    137636
Name: albedo, dtype: int64

moid
False    137636
Name: moid, dtype: int64

class
False    137636
Name: class, dtype: int64

Mean_motion
False    137636
Name: Mean_motion, dtype: int64

orbital_Period_per
False    137636
Name: orbital_Period_per, dtype: int64

Mean_anomaly
False    137636
Name: Mean_anomaly, dtype: int64

In [25]:
#This will convert the dataframe into a csv file named 'astroid_cleansed'
#df.to_csv('astroid_cleansed.csv')
In [26]:
df.corr()
Out[26]:
semi_major_axis eccentricity Inclination Longitude perihelion_arg perihelion_dis aphelion_dist Orbital_period_per_y data_arc n_obs_used Abs_Magnitude diameter albedo moid Mean_motion orbital_Period_per Mean_anomaly
semi_major_axis 1.000000 0.021343 0.148705 -0.000675 -0.002379 0.366541 0.986158 0.941359 -0.019646 -0.049336 -0.132610 0.144736 -0.110168 0.369826 -0.279518 0.941359 0.014283
eccentricity 0.021343 1.000000 0.144882 -0.000398 0.012354 -0.514929 0.114209 0.048275 -0.028362 -0.076498 0.198676 -0.049133 -0.019264 -0.490871 0.189318 0.048275 -0.018082
Inclination 0.148705 0.144882 1.000000 -0.012863 -0.004291 0.085951 0.141043 0.096304 -0.199472 -0.224616 -0.033771 0.052609 -0.089066 0.125247 -0.108291 0.096304 0.015262
Longitude -0.000675 -0.000398 -0.012863 1.000000 -0.106991 -0.002965 -0.000181 0.000367 -0.000501 -0.023611 0.002801 0.001164 0.000739 -0.003694 0.008053 0.000367 -0.003253
perihelion_arg -0.002379 0.012354 -0.004291 -0.106991 1.000000 -0.006678 -0.001312 -0.001621 -0.005570 0.010236 -0.008439 0.002966 -0.003027 -0.006532 0.002438 -0.001621 0.001528
perihelion_dis 0.366541 -0.514929 0.085951 -0.002965 -0.006678 1.000000 0.207199 0.109322 -0.017597 -0.080338 -0.374466 0.329703 -0.261687 0.996821 -0.706225 0.109322 0.071083
aphelion_dist 0.986158 0.114209 0.141043 -0.000181 -0.001312 0.207199 1.000000 0.970338 -0.017522 -0.037559 -0.072703 0.093430 -0.069204 0.211219 -0.168049 0.970338 0.002351
Orbital_period_per_y 0.941359 0.048275 0.096304 0.000367 -0.001621 0.109322 0.970338 1.000000 -0.007837 -0.011021 -0.035556 0.048953 -0.019492 0.110897 -0.059904 1.000000 -0.005284
data_arc -0.019646 -0.028362 -0.199472 -0.000501 -0.005570 -0.017597 -0.017522 -0.007837 1.000000 0.755441 -0.670998 0.491580 0.254523 -0.025349 0.039911 -0.007837 -0.017377
n_obs_used -0.049336 -0.076498 -0.224616 -0.023611 0.010236 -0.080338 -0.037559 -0.011021 0.755441 1.000000 -0.782006 0.385747 0.448285 -0.090066 0.127482 -0.011021 -0.047139
Abs_Magnitude -0.132610 0.198676 -0.033771 0.002801 -0.008439 -0.374466 -0.072703 -0.035556 -0.670998 -0.782006 1.000000 -0.568493 -0.240790 -0.370004 0.328595 -0.035556 -0.006041
diameter 0.144736 -0.049133 0.052609 0.001164 0.002966 0.329703 0.093430 0.048953 0.491580 0.385747 -0.568493 1.000000 -0.107334 0.332423 -0.201023 0.048953 0.009659
albedo -0.110168 -0.019264 -0.089066 0.000739 -0.003027 -0.261687 -0.069204 -0.019492 0.254523 0.448285 -0.240790 -0.107334 1.000000 -0.266281 0.343051 -0.019492 -0.044653
moid 0.369826 -0.490871 0.125247 -0.003694 -0.006532 0.996821 0.211219 0.110897 -0.025349 -0.090066 -0.370004 0.332423 -0.266281 1.000000 -0.692139 0.110897 0.072123
Mean_motion -0.279518 0.189318 -0.108291 0.008053 0.002438 -0.706225 -0.168049 -0.059904 0.039911 0.127482 0.328595 -0.201023 0.343051 -0.692139 1.000000 -0.059904 -0.063535
orbital_Period_per 0.941359 0.048275 0.096304 0.000367 -0.001621 0.109322 0.970338 1.000000 -0.007837 -0.011021 -0.035556 0.048953 -0.019492 0.110897 -0.059904 1.000000 -0.005284
Mean_anomaly 0.014283 -0.018082 0.015262 -0.003253 0.001528 0.071083 0.002351 -0.005284 -0.017377 -0.047139 -0.006041 0.009659 -0.044653 0.072123 -0.063535 -0.005284 1.000000
In [27]:
#Checking correlation of diameter with other features
df[df.columns[1:]].corr()['diameter'][:]
Out[27]:
eccentricity           -0.049133
Inclination             0.052609
Longitude               0.001164
perihelion_arg          0.002966
perihelion_dis          0.329703
aphelion_dist           0.093430
Orbital_period_per_y    0.048953
data_arc                0.491580
n_obs_used              0.385747
Abs_Magnitude          -0.568493
diameter                1.000000
albedo                 -0.107334
moid                    0.332423
Mean_motion            -0.201023
orbital_Period_per      0.048953
Mean_anomaly            0.009659
Name: diameter, dtype: float64
In [28]:
df[["n_obs_used", "diameter"]].corr()
Out[28]:
n_obs_used diameter
n_obs_used 1.000000 0.385747
diameter 0.385747 1.000000
In [29]:
f, ax = plt.subplots(figsize=(12, 5))
c1 = sns.regplot(x="diameter", y="eccentricity", data=df, ax=ax)
plt.ylim(0,)
c1.set_title('Eccentricity Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[29]:
Text(0.5, 1.0, 'Eccentricity Correlation with Diameter')
In [30]:
f, ax = plt.subplots(figsize=(12, 5))
c2 = sns.regplot(x="diameter", y="Inclination", data=df, ax=ax)
plt.ylim(0,)
c2.set_title('Inclination Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[30]:
Text(0.5, 1.0, 'Inclination Correlation with Diameter')
In [31]:
f, ax = plt.subplots(figsize=(12, 5))
c3 = sns.regplot(x="diameter", y="Longitude", data=df, ax=ax)
plt.ylim(0,)
c3.set_title('Longitude Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[31]:
Text(0.5, 1.0, 'Longitude Correlation with Diameter')
In [32]:
f, ax = plt.subplots(figsize=(12, 5))
c4 = sns.regplot(x="diameter", y="perihelion_arg", data=df, ax=ax)
plt.ylim(0,)
c4.set_title('Perihelion_arg Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[32]:
Text(0.5, 1.0, 'Perihelion_arg Correlation with Diameter')
In [33]:
f, ax = plt.subplots(figsize=(12, 5))
c5 = sns.regplot(x="diameter", y="perihelion_dis", data=df, ax=ax)
plt.ylim(0,)
c5.set_title('Periherlion_dis Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[33]:
Text(0.5, 1.0, 'Periherlion_dis Correlation with Diameter')
In [34]:
f, ax = plt.subplots(figsize=(12, 5))
c6 = sns.regplot(x="diameter", y="aphelion_dist", data=df, ax=ax)
plt.ylim(0,)
c6.set_title('Aphelion_dist Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[34]:
Text(0.5, 1.0, 'Aphelion_dist Correlation with Diameter')
In [35]:
f, ax = plt.subplots(figsize=(12, 5))
c7 = sns.regplot(x="diameter", y="Orbital_period_per_y", data=df, ax=ax)
plt.ylim(0,)
c7.set_title('Orbital_Period_per_y Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[35]:
Text(0.5, 1.0, 'Orbital_Period_per_y Correlation with Diameter')
In [36]:
f, ax = plt.subplots(figsize=(12, 5))
c8 = sns.regplot(x="diameter", y="data_arc", data=df, ax=ax)
plt.ylim(0,)
c8.set_title('Data_arc Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[36]:
Text(0.5, 1.0, 'Data_arc Correlation with Diameter')
In [37]:
f, ax = plt.subplots(figsize=(12, 5))
c9 = sns.regplot(x="diameter", y="n_obs_used", data=df, ax=ax)
plt.ylim(0,)
c9.set_title('n_obs_used Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[37]:
Text(0.5, 1.0, 'n_obs_used Correlation with Diameter')
In [38]:
f, ax = plt.subplots(figsize=(12, 5))
sns.regplot(x="diameter", y="Abs_Magnitude", data=df, ax=ax)
plt.ylim(0,)
c10.set_title('Abs_Magnitude Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-38-e9040efe2f6e> in <module>
      2 sns.regplot(x="diameter", y="Abs_Magnitude", data=df, ax=ax)
      3 plt.ylim(0,)
----> 4 c10.set_title('Abs_Magnitude Correlation with Diameter', fontdict={'fontsize':18}, pad=16)

NameError: name 'c10' is not defined
In [46]:
f, ax = plt.subplots(figsize=(12, 5))
c11 = sns.regplot(x="diameter", y="albedo", data=df, ax=ax)
plt.ylim(0,)
c11.set_title('albedo Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[46]:
Text(0.5, 1.0, 'albedo Correlation with Diameter')
In [39]:
f, ax = plt.subplots(figsize=(12, 5))
c12 = sns.regplot(x="diameter", y="moid", data=df, ax=ax)
plt.ylim(0,)
c12.set_title('Moid Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[39]:
Text(0.5, 1.0, 'Moid Correlation with Diameter')
In [40]:
f, ax = plt.subplots(figsize=(12, 5))
c13 = sns.regplot(x="diameter", y="Mean_motion", data=df, ax=ax)
plt.ylim(0,)
c13.set_title('Mean_motion Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[40]:
Text(0.5, 1.0, 'Mean_motion Correlation with Diameter')
In [41]:
f, ax = plt.subplots(figsize=(12, 5))
c14 = sns.regplot(x="diameter", y="Mean_anomaly", data=df, ax=ax)
plt.ylim(0,)
c14.set_title('Mean_Anomaly Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[41]:
Text(0.5, 1.0, 'Mean_Anomaly Correlation with Diameter')
In [42]:
f, ax = plt.subplots(figsize=(12, 5))
c15 = sns.regplot(x="diameter", y="orbital_Period_per", data=df, ax=ax)
plt.ylim(0,)
c15.set_title('Eccentricity Correlation with Diameter', fontdict={'fontsize':18}, pad=16)
Out[42]:
Text(0.5, 1.0, 'Eccentricity Correlation with Diameter')
In [43]:
plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(df.corr(), vmin=-1, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':18}, pad=12)
#plt.savefig('heatmap.png', dpi=300, bbox_inches='tight')
Out[43]:
Text(0.5, 1.0, 'Correlation Heatmap')
In [44]:
np.triu(np.ones_like(df.corr()))
plt.figure(figsize=(16, 6))
mask = np.triu(np.ones_like(df.corr(), dtype=np.bool))
heatmap = sns.heatmap(df.corr(), mask=mask, vmin=-1, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title('Triangle Correlation Heatmap', fontdict={'fontsize':18}, pad=16);
In [45]:
plt.figure(figsize=(4, 10))
heatmap = sns.heatmap(df.corr()[['diameter']].sort_values(by='diameter', ascending=False), vmin=-1, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title('Features Correlating with Diameter', fontdict={'fontsize':18}, pad=16)
plt.savefig('heatmap.png', dpi=300, bbox_inches='tight')
Out[45]:
Text(0.5, 1.0, 'Features Correlating with Diameter')